library(redivis)
library(tidyverse)
library(tictoc)
library(fuzzyjoin)
library(kableExtra)
library(scales)


load("control_data/candidate_partisanship.Rds")
load("california.Rds")

tot_data = candidate_partisanship |>  
  mutate(label = ifelse(label == "Non-profit worker", 
                        "Non-profit Worker",
                        label))

#53.6% of sample identified in voter file 
nrow(tot_data)/nrow(california)

tot_data |> 
  mutate(party_clean = 
           ifelse(Party %in% c("Democratic", "Republican"), Party, "Non-Partisan/Other")
  ) %>% 
  count(label, party_clean) %>% 
  group_by(label) |> 
  mutate(prop = n/sum(n)) |> 
  filter(party_clean == "Democratic") |> 
  arrange(prop)


#graphing 
tot_data |> 
  mutate(party_clean = 
           ifelse(Party %in% c("Democratic", "Republican"), Party, "Non-Partisan/Other")
         ) %>% 
  count(label, party_clean) %>% 
  group_by(label) %>% 
  mutate(prop = n / sum(n),
         party_clean = factor(party_clean, levels = c("Non-Partisan/Other", 
                                                      "Republican",
                                                      "Democratic")),
         label = factor(label, levels = c("Military or Law Enforcement",
                                          "Laborer/Worker",
                                          "Business Owner/Executive",
                                          "Business Employee",
                                          "Technical Professional",
                                          "Politician or Staff Member",
                                          "Service Based Professional",
                                          "Lawyer",
                                          "Non-Profit Worker"
                                          ))) %>% 
  ggplot(aes(x = label))+
  geom_bar(position="fill", 
           stat="identity", 
           aes(y = prop, fill = party_clean),
           width = 0.6) + 
  scale_fill_manual(
    "Party Registration",
    values = c(
      "Democratic" = "blue",
      "Republican" = "red",
      "Non-Partisan/Other" = "grey"
    )
  ) + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 40, vjust = 0.5, size = 18, colour = "black"),
        axis.text.y = element_text(colour = "black", size = 14),
        text = element_text(size = 14),
        legend.text = element_text(size = 18),
        legend.title = element_text(size = 18),
        plot.title = element_text(size = 18))+
  scale_y_continuous(labels = scales::percent) + 
  ylab("") + 
  xlab("") + 
  geom_text(data = data.frame(xtabs(~label, data = tot_data)) %>% mutate(Freq = paste0("n = ", Freq)), 
            aes(label = Freq, y = 0),
            position = position_dodge(width = 0.9),
            vjust = 1.2,
            size = 6) 

ggsave(filename = "figures/partisanship_complete.png", height = 10, width = 15, bg = "white")



party_table = tot_data %>% 
  mutate(party_clean = 
           ifelse(Party %in% c("Democratic", "Republican"), Party, "Non-Partisan/Other")
  ) %>% 
  count(label, party_clean) %>% 
  group_by(label) %>% 
  mutate(prop = n / sum(n),
         party_clean = factor(party_clean, levels = c("Non-Partisan/Other", 
                                                      "Republican",
                                                      "Democratic")),
         label = factor(label,  levels = c("Military or Law Enforcement",
                                           "Laborer/Worker",
                                           "Business Owner/Executive",
                                           "Business Employee",
                                           "Technical Professional",
                                           "Politician or Staff Member",
                                           "Service Based Professional",
                                           "Lawyer",
                                           "Non-Profit Worker"
         ))) |> 
  pivot_wider(names_from = party_clean, values_from = c(n, prop)) |> 
  select(c(1,2,5,4,7,3,6)) |> 
  mutate_at(c(3,5,7), ~scales::percent(., accuracy = .01))

party_table |> 
  arrange(prop_Democratic) |> 
  kbl(align = 'l',
      col.names = c("Category", "n", "% of Category", "n", "% of Category", "n", "% of Category"),
      format = "latex") %>% 
  kableExtra::kable_styling() %>% 
  add_header_above(c(" " = 1, "Democratic" = 2, "Republican" = 2, "Non-Partisan/Other" = 2))



#chi sq test
contingency_table = with(tot_data %>% mutate(party_clean = 
           ifelse(Party %in% c("Democratic", "Republican"), Party, "Non-Partisan/Other")),
           base::table(party_clean, label))
chisq.test(contingency_table) 





#
#
#same exercise with local elections database
#
#
ledb = read_csv("control_data/ledb_candidatelevel.csv") |> 
  filter(state_abb == "CA") |> 
  filter(office_consolidated == "City Council") |> 
  mutate(place_fips = substr(fips, 3, 7)) |> 
  select(lastname, pid_est, year, place_fips) |> 
  mutate(LAST = str_to_title(lastname)) |> 
  select(LAST, year, place_fips, pid_est) |> 
  rename(YEAR = year) |> 
  mutate(YEAR = factor(YEAR))

ca_ledb_merge = california |> 
  select(FIRST, LAST, YEAR, label, place_fips) |> 
  merge(ledb, by = c("LAST", "YEAR", "place_fips"))

ca_ledb_merge |> 
  count(label, pid_est) %>% 
  group_by(label) |> 
  mutate(prop = n/sum(n),
         party_clean = case_when(
           pid_est == "D" ~ "Democratic",
           pid_est == "R" ~ "Republican",
           T ~ "Non-Partisan/Other"
         )) |> 
  mutate(party_clean = factor(party_clean, levels = c("Non-Partisan/Other", 
                       "Republican",
                       "Democratic"))) |> 
  mutate(label = factor(label, levels = c("Military or Law Enforcement",
                                          "Business Employee",
                                          "Laborer/Worker",
                                          "Business Owner/Executive",
                                          "Technical Professional",
                                          "Politician or Staff Member",
                                          "Service Based Professional",
                                          "Lawyer",
                                          "Non-Profit Worker"
         ))) |> 
  ggplot(aes(x = label))+
  geom_bar(position="fill", 
           stat="identity", 
           aes(y = prop, fill = party_clean),
           width = 0.6) + 
  scale_fill_manual(
    "Party Registration",
    values = c(
      "Democratic" = "blue",
      "Republican" = "red",
      "Non-Partisan/Other" = "grey"
    )
  ) + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 40, vjust = 0.5, size = 18, colour = "black"),
        axis.text.y = element_text(colour = "black", size = 14),
        text = element_text(size = 14),
        legend.text = element_text(size = 18),
        legend.title = element_text(size = 18),
        plot.title = element_text(size = 18))+
  scale_y_continuous(labels = scales::percent) + 
  ylab("") + 
  xlab("") + 
  geom_text(data = data.frame(xtabs(~label, data = ca_ledb_merge)) %>% mutate(Freq = paste0("n = ", Freq)), 
            aes(label = Freq, y = 0),
            position = position_dodge(width = 0.9),
            vjust = 1.2,
            size = 6) 

ggsave(filename = "figures/app_ledb_partisanship_complete.png", height = 10, width = 15, bg = "white")

#chi sq test with ledb data
contingency_table = with(ca_ledb_merge,
                         base::table(pid_est, label, useNA ="ifany"))
chisq.test(contingency_table) 

nrow(ca_ledb_merge)/nrow(california)

